*------------------------------------------------------------------------------*
*------------------------------------------------------------------------------*
*------------------------------------------------------------------------------*

* Authors: Ranjit Lall & David Vilalta

* Paper: Varieties of Anxieties: Disaggregating Emotion and Voting Behavior in the COVID-19 Era

* Purpose: Cleans and formats dataset used for the electoral analysis in Madrid

*------------------------------------------------------------------------------*
*------------------------------------------------------------------------------*
*------------------------------------------------------------------------------*

clear all 

* Insert folder path
global folder ".../Insert folder path"

* Set working directory for data files
cd "${folder}"

* Define local folders
global dta "Final_data/"
global tables "Results/Tables/"
global figures "Results/Figures/"
global interm_files "Raw_data/Intermediate_files/Madrid_Election/"
global madrid_files "Raw_data/Madrid_Election/"


* Import and save Madrid election results
import excel "${madrid_files}/Madrid 2015 Mun Results.xlsx", sheet("Formatted") firstrow clear
save "${interm_files}/madridelection_2015", replace

import excel "${madrid_files}/Madrid 2019 Mun Results.xlsx", sheet("Formatted") firstrow clear
save "${interm_files}/madridelection_2019", replace
drop Year
gen Year=2021

rename vox vox_2015
rename pp pp_2015
rename Cs Cs_2015 
rename psoe psoe_2015
rename podemos podemos_2015
rename masmadrid masmadrid_2015
rename turnout turnout_2015

save "${interm_files}/madridelection_2019_2015", replace

import excel "${madrid_files}/Madrid 2019 Mun Results.xlsx", sheet("Formatted") firstrow clear
merge 1:1 MUN GEOCODIGO Year using "${interm_files}/madridelection_2015"
drop _merge
save "${interm_files}/madridelection_2019", replace

import excel "${madrid_files}/Madrid 2021 Mun Results.xlsx", sheet("Formatted") firstrow clear
merge 1:1 MUN GEOCODIGO Year using "${interm_files}/madridelection_2019_2015"
drop _merge
save "${interm_files}/madridelection", replace

* Import and save additional data files
import excel "${madrid_files}/MAD_Agriculture.xlsx", sheet("Formatted") firstrow clear
save "${interm_files}/MAD_agriculture", replace

import excel "${madrid_files}/MAD_Agriculture2.xlsx", sheet("Formatted") firstrow clear
save "${interm_files}/MAD_agriculture2", replace

import excel "${madrid_files}/MAD_Agriculture3.xlsx", sheet("Formatted") firstrow clear
save "${interm_files}/MAD_agriculture3", replace

import excel "${madrid_files}/MAD_Distance.xlsx", sheet("Formatted") firstrow clear
save "${interm_files}/MAD_distance", replace

import excel "${madrid_files}/Madrid_Latest_Merged_Data.xlsx", sheet("Formatted_Waves") firstrow clear
save "${interm_files}/covid_waves", replace

import excel "${madrid_files}/Viento_formatted_mad_6m.xlsx", sheet("Formatted") firstrow clear
save "${interm_files}/mad_viento_6m", replace

import excel "${madrid_files}/Precipitacion_formatted_mad_6m.xlsx", sheet("Formatted") firstrow clear
save "${interm_files}/mad_rain_6m", replace

import excel "${madrid_files}/.temperatura_formatted_mad_6m.xlsx", sheet("Formatted") firstrow clear
save "${interm_files}/mad_temp_6m", replace

* Filter cumulative cases for Madrid election
use "${interm_files}/cumulative_cases", clear
keep if PROV==28
keep if date==tm(2021m4)
save "${interm_files}/cumulative_cases_MAD_ELECTION", replace

* Merge Madrid election data with external data
use "${interm_files}/madridelection", clear

merge 1:1 MUN using "${interm_files}/cumulative_cases_MAD_ELECTION"
drop _merge

merge 1:1 MUN using "${interm_files}/covid_waves"
drop _merge

merge 1:1 GEOCODIGO using "${interm_files}/MAD_agriculture"
drop _merge

merge 1:1 GEOCODIGO using "${interm_files}/MAD_agriculture2"
drop _merge

merge 1:1 GEOCODIGO using "${interm_files}/MAD_agriculture3"
drop _merge

merge 1:1 GEOCODIGO using "${interm_files}/MAD_distance"
drop _merge

merge 1:1 GEOCODIGO using "${interm_files}/mad_viento_6m"
drop _merge

merge 1:1 GEOCODIGO using "${interm_files}/mad_temp_6m"
drop _merge

merge 1:1 GEOCODIGO using "${interm_files}/mad_rain_6m"
drop _merge

merge m:1 PROV MUN using "${interm_files}/population"
drop _merge

merge m:1 PROV MUN using "${interm_files}/nursingdata"
drop _merge

drop if Year==.

gen nursinghomes_P=nursinghomes/POB20
gen nursingplaces_P=nursingplaces/POB20
gen log_nursingplaces=log(nursingplaces+1)

save "${interm_files}/madridelection_2021", replace

append using "${interm_files}/madridelection_2019"

replace cumulative_cases_m_P=0 if Year==2019
replace cumulative_cases=0 if Year==2019

gen sp_govt=psoe+podemos
gen mad_govt=pp+Cs
gen sp_oppo=vox+pp+Cs
gen mad_oppo=psoe+podemos
gen proconf=psoe+podemos+masmadrid+Cs
gen anticonf=pp+vox
gen proconf_placebo=psoe_2015+podemos_2015+masmadrid_2015+Cs_2015
gen anticonf_placebo=pp_2015+vox_2015

xtset MUN Year

gen D_vox=vox-L2.vox
gen D_pp=pp-L2.pp
gen D_Cs=Cs-L2.Cs
gen D_psoe=psoe-L2.psoe
gen D_masmadrid=masmadrid-L2.masmadrid
gen D_podemos=podemos-L2.podemos
gen D_sp_govt=sp_govt-L2.sp_govt
gen D_mad_govt=mad_govt-L2.mad_govt
gen D_sp_oppo=sp_oppo-L2.sp_oppo
gen D_mad_oppo=mad_oppo-L2.mad_oppo
gen D_proconf=proconf-L2.proconf
gen D_anticonf=anticonf-L2.anticonf
gen D_turnout=turnout-L2.turnout
gen D_proconf_placebo=proconf_placebo-L2.proconf_placebo
gen D_anticonf_placebo=anticonf_placebo-L2.anticonf_placebo

save "${interm_files}/Mad_electoral_data", replace

import excel "${madrid_files}/MAD_UNEM.xlsx", sheet("Formatted") firstrow clear
replace Year=2021 if Year==2020
save "${interm_files}/mad_unem", replace

import excel "${madrid_files}/MAD_GENDER.xlsx", sheet("Formatted") firstrow clear
replace Year=2021 if Year==2020
save "${interm_files}/mad_gender", replace

import excel "${madrid_files}/MAD_GENDER.xlsx", sheet("POP Formatted") firstrow clear
replace Year=2021 if Year==2020
save "${interm_files}/mad_pop", replace

import excel "${madrid_files}/MAD_HOTELPLACES.xlsx", sheet("Formatted") firstrow clear
replace Year=2021 if Year==2020
save "${interm_files}/mad_hotelplaces", replace

import excel "${madrid_files}/MAD_AGE.xlsx", sheet("Formatted") firstrow clear
replace Year=2021 if Year==2020
drop if Year==2019
replace Year=2019 if Year==2018
save "${interm_files}/mad_age", replace

import excel "${madrid_files}/MAD_SOCSEC.xlsx", sheet("Formatted") firstrow clear
replace Year=2021 if Year==2020
save "${interm_files}/mad_socsec", replace

import excel "${madrid_files}/MAD_MIGRATION.xlsx", sheet("Formatted") firstrow clear
replace Year=2021 if Year==2020
save "${interm_files}/mad_migration", replace

import excel "${madrid_files}/MAD_NUTS4.xlsx", sheet("conuts4") firstrow clear
save "${interm_files}/nuts4", replace

import excel "${madrid_files}/MAD_pib_hotels.xlsx", sheet("Formatted") firstrow clear
save "${interm_files}/pib_hotels", replace

import excel "${madrid_files}/MAD_pib_cap.xlsx", sheet("Formatted") firstrow clear
save "${interm_files}/pib_cap", replace

import excel "${madrid_files}/MAD_diseases.xlsx", sheet("Formatted") firstrow clear
save "${interm_files}/disease", replace

import excel "${madrid_files}/MAD_entcred.xlsx", sheet("Formatted") firstrow clear
save "${interm_files}/entcred", replace

import excel "${madrid_files}/MAD_diseases_respiratory_perc.xlsx", sheet("Formatted") firstrow clear
save "${interm_files}/resp_disease", replace

import excel "${madrid_files}/MAD_diseases_respiratory_abs.xlsx", sheet("Formatted") firstrow clear
save "${interm_files}/resp_disease_abs", replace

import excel "${madrid_files}/MAD_diseases_respiratory_abs.xlsx", sheet("Formatted_2") firstrow clear
replace Year=2021 if Year==2020
drop if Year==2019
replace Year=2019 if Year==2018 
save "${interm_files}/resp_disease_abs_2", replace

import excel "${madrid_files}/MAD_diseases_infec_para.xlsx", sheet("Formatted") firstrow clear
save "${interm_files}/infec_disease", replace

import excel "${madrid_files}/MAD_ALTITUD.xlsx", sheet("Formatted") firstrow clear

rename MUN MUNSTRING
encode MUNSTRING, gen(MUN)
save "${interm_files}/mad_altitud", replace

use "${interm_files}/Mad_electoral_data", clear
merge 1:1 GEOCODIGO Year using "${interm_files}/mad_unem"
drop _merge
merge 1:1 GEOCODIGO Year using "${interm_files}/mad_gender"
drop _merge
merge 1:1 GEOCODIGO Year using "${interm_files}/mad_hotelplaces"
drop _merge
merge 1:1 GEOCODIGO Year using "${interm_files}/mad_age"
drop _merge
merge 1:1 GEOCODIGO Year using "${interm_files}/resp_disease_abs_2"
drop _merge
merge 1:1 GEOCODIGO Year using "${interm_files}/mad_socsec"
drop _merge
merge 1:1 GEOCODIGO Year using "${interm_files}/mad_migration"
drop _merge
merge 1:1 GEOCODIGO Year using "${interm_files}/mad_pop"
drop _merge
merge 1:1 GEOCODIGO Year using "${interm_files}/pib_hotels"
drop _merge
merge 1:1 GEOCODIGO Year using "${interm_files}/pib_cap"
drop _merge
merge 1:1 GEOCODIGO Year using "${interm_files}/disease"
drop _merge
merge 1:1 GEOCODIGO Year using "${interm_files}/entcred"
drop _merge
merge 1:1 GEOCODIGO Year using "${interm_files}/infec_disease"
drop _merge
merge 1:1 GEOCODIGO Year using "${interm_files}/resp_disease"
drop _merge
merge 1:1 GEOCODIGO Year using "${interm_files}/resp_disease_abs"
drop _merge
merge m:1 GEOCODIGO using "${interm_files}/nuts4"
drop _merge
merge m:1 GEOCODIGO using "${interm_files}/mad_altitud"
drop _merge


gen per_socsec=socsec/population
gen per_migration=migration/population
gen socsec_2=socsec/1000

gen per_age_0_20=age_0_20/population
gen per_age_21_35=age_21_35/population
gen per_age_36_50=age_36_50/population
gen per_age_51_65=age_51_65/population
gen per_age_66_100=age_66_100/population

replace Altitud=0 if Year==2019
replace agri_P=0 if Year==2019
replace agri_2=0 if Year==2019
replace agri_3=0 if Year==2019

gen agri_P2=agri_2/POB20
gen agri_P3=agri_3/POB20

replace agri_P2=0 if Year==2019
replace agri_P3=0 if Year==2019

replace Dismad=0 if Year==2019

replace cc_total=0 if Year==2019
replace cc_3m=0 if Year==2019
replace cc_6m=0 if Year==2019
replace cc_2nd=0 if Year==2019
replace cc_3rd=0 if Year==2019

gen cc_total_P=cc_total/population
gen cc_3m_P=cc_3m/population
gen cc_6m_P=cc_6m/population
gen cc_2nd_P=cc_2nd/population
gen cc_3rd_P=cc_3rd/population

drop cumulative_cases_m_P
gen cumulative_cases_m_P=(cumulative_cases/POB20)
replace cumulative_cases_m_P=0 if Year==2019

gen cumcases=ln(cumulative_cases_m_P+1)

gen lcumtot=log(cumulative_cases+1)

gen logcasesP=log(cumcases+1)

gen lognursp=log(nursingplaces_P+1)

gen logalt=log(Altitud+1)



global ivis wind_6m	wind_12m wind_all wind_3m	wind_3mb wind_6mb wind_12mb wind_allb temp_6m temp_12m temp_all temp_3m temp_3mb temp_6mb temp_12mb temp_allb rain_6m rain_12m rain_all rain_3m rain_3mb rain_6mb rain_12mb rain_allb rain_3mc rain_6mc rain_12mc

foreach var in $ivis{
	
	replace `var'=0 if Year==2019
	
}

gen rain_wind_6m=rain_6m*wind_6m
gen temp_rain_6m=temp_6m*rain_6m
gen temp_wind_6m=temp_6m*wind_6m

replace temp_all=0 if Year==2019
replace rain_all=0 if Year==2019
replace wind_all=0 if Year==2019


gen hotelplaces_P=hotelplaces/population

gen dummy=pib_cap/population
drop pib_cap 
rename dummy pib_cap

gen dummy=pib_p_hotels/10000
drop pib_p_hotels
rename dummy pib_p_hotels

egen deciles_pib=xtile( pib_cap ), nq(20)
gen treat=0
replace treat=1 if deciles_pib==1 | deciles_pib==20
replace treat=. if Year!=2021

gen treat_2=0
replace treat_2=1 if deciles_pib==1 | deciles_pib==2 | deciles_pib==19 |deciles_pib==20
replace treat_2=. if Year!=2021


global diss dis_19 dis_18 dis_17 dis_16 dis_15 dis_14 dis_13 dis_12 dis_11 dis_10

foreach dis in $diss{

gen `dis'_p=`dis'/population
gen `dis'_T=0
replace `dis'_T=1 if `dis' > 0
replace `dis'=. if Year!=2021

}

global respdis respdid_2020 respdid_2019 respdid_2018 respdid_2017 respdid_2016 respdid_2015 respdid_2014 respdid_2013 respdid_2012 respdid_2011 respdid_2010

foreach respdis in $respdis{

destring `respdis', gen(`respdis'_num)

}


global infdis infdis_2020 infdis_2019 infdis_2018 infdis_2017 infdis_2016 infdis_2015 infdis_2014 infdis_2013 infdis_2012 infdis_2011 infdis_2010

foreach infdis in $infdis{

gen `infdis'_p=`infdis'/population

}


global arespdiss arespdis_2020 arespdis_2019 arespdis_2018 arespdis_2017 arespdis_2016 arespdis_2015 arespdis_2014 arespdis_2013 arespdis_2012 arespdis_2011 arespdis_2010

foreach arespdis in $arespdiss{

gen `arespdis'_p=`arespdis'/population

}

gen deaths_resp_p=deaths_resp/population

gen mun= GEOCODIGO

keep population per_migration migration per_men per_age_0_20 per_age_21_35 per_age_36_50 per_age_51_65 per_age_66_100 hotelplaces_P nursingplaces_P agri_2 socsec pib_cap D_turnout D_proconf_placebo D_anticonf_placebo D_proconf D_anticonf cumulative_cases_m_P cumcases treat treat_2 pib_p_hotels NUTS4 arespdis_2018_p arespdis_2019_p Altitud Year wind_6m	wind_12m	wind_all	wind_3m	wind_3mb	wind_6mb	wind_12mb	wind_allb temp_6m	temp_12m	temp_all	temp_3m	temp_3mb	temp_6mb	temp_12mb 	temp_allb rain_6m	rain_12m	rain_all	rain_3m	rain_3mb	rain_6mb	rain_12mb	rain_allb temp_3mc	temp_6mc	temp_12mc rain_3mc	rain_6mc	rain_12mc wind_3mc wind_6mc wind_12mc mun proconf anticonf turnout deciles_pib per_socsec agri_3 socsec_2 deaths_resp_p GEOCODIGO NAMEUNIT rain_wind_6m
drop if Year==2017
 
encode mun, gen(mun_s)
rename Year year
xtset mun_s year

gen per_women=1-per_men

global controls population per_migration migration per_women per_age_0_20 per_age_21_35 per_age_36_50 per_age_51_65 per_age_66_100 hotelplaces_P nursingplaces_P per_socsec

foreach control in $controls{
	gen D_`control'=`control'-L2.`control'	
}

drop wind_12m wind_all wind_3m wind_3mb wind_6mb wind_12mb wind_allb wind_3mc wind_12mc wind_6m
drop temp_12m temp_all temp_3m temp_3mb temp_6mb temp_12mb temp_allb temp_3mc temp_12mc temp_6m
drop rain_12m rain_all rain_3m rain_3mb rain_6mb rain_12mb rain_allb rain_3mc rain_12mc rain_6m
drop rain_wind_6m socsec_2 arespdis_2018_p arespdis_2019_p treat_2 socsec_2


label var cumcases "Log Cumulative Cases per person"
label var population "Population"
label var migration "Migration per Person (000's)"
label var hotelplaces_P "Number of Hotel Places per person"
label var nursingplaces_P "Number of Nursing Places per person"
label var agri_2 "Area of ​​agricultural holdings (ha)"
label var Altitud "Altitud"
label var pib_cap "GDP per Person (000's)" 
label var per_men "Percentage of Men"
label var per_age_0_20 "Percentage Aged 0-20"
label var per_age_21_35 "Percentage Aged 21-35"
label var per_age_36_50 "Percentage Aged 36-50"
label var per_age_51_65 "Percentage Aged 51-65"
label var per_age_66_100 "Percentage Aged 66+"
label var treat "Top and Bottom Ventiles"
label var deaths_resp_p "Deaths from Respiratory Diseases per Person"
label var pib_p_hotels "GDP per Person of Host. \& Distr. Serv. (000's)"
label var treat "Top and Bottom Ventiles"
label var D_population "$\Delta$ Population"
label var D_per_women "$\Delta$ Percentage of Women"
label var D_per_age_0_20 "$\Delta$ Percentage Aged 0-20"
label var D_per_age_21_35 "$\Delta$ Percentage Aged 21-35"
label var D_per_age_36_50 "$\Delta$ Percentage Aged 36-50"
label var D_per_age_51_65 "$\Delta$ Percentage Aged 51-65"
label var D_per_age_66_100 "$\Delta$ Percentage Aged 66+"
label var D_per_socsec "$\Delta$ Percentage employed"
label var pib_cap "GDP per person"
label var proconf "Pro-confinement"
label var anticonf "Anti-confinement"
label var D_proconf "$\Delta$ Pro-confinement" 
label var D_anticonf "$\Delta$ Anti-confinement" 
label var D_turnout "$\Delta$ Turnout" 
label var D_proconf_placebo "$\Delta$ Pro-confinement (placebo)" 
label var D_anticonf_placebo "$\Delta$ Anti-confinement (placebo)" 
label var per_socsec "Percentage Employed"
label var socsec "Number Employed"
label var per_migration "Share of Migrants" 
label var cumulative_cases_m_P "Cumulative Cases per person"
label var deciles_pib "GDP (Deciles)"
label var mun "Municipality"
label var mun_s "Municipality (num)"
label var per_women "Share of Women"
label var D_per_migration "$\Delta$ Share of Migrants" 
label var D_migration  "$\Delta$ Number of Migrants" 
label var D_hotelplaces_P  "$\Delta$ Number of Hotel Places per person"
label var D_nursingplaces_P "$\Delta$ Number of Nursing Places per person"





save "${dta}Finaldata_Madrid", replace







